/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 *    Copyright (C) 2008
 *    & Alexander Smirnov (austellus@gmail.com)
 */
package weka.clusterers;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.Vector;

import weka.core.Capabilities;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionUtils;
import weka.core.SparseInstance;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import weka.core.Capabilities.Capability;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;

/**
 * <!-- globalinfo-start --> Yiling Yang, Xudong Guan, Jinyuan You: CLOPE: a
 * fast and effective clustering algorithm for transactional data. In:
 * Proceedings of the eighth ACM SIGKDD international conference on Knowledge
 * discovery and data mining, 682-687, 2002.
 * <p/>
 * <!-- globalinfo-end -->
 * 
 * <!-- technical-bibtex-start --> BibTeX:
 * 
 * <pre>
 * &#64;inproceedings{Yang2002,
 *    author = {Yiling Yang and Xudong Guan and Jinyuan You},
 *    booktitle = {Proceedings of the eighth ACM SIGKDD international conference on Knowledge discovery and data mining},
 *    pages = {682-687},
 *    publisher = {ACM  New York, NY, USA},
 *    title = {CLOPE: a fast and effective clustering algorithm for transactional data},
 *    year = {2002}
 * }
 * </pre>
 * <p/>
 * <!-- technical-bibtex-end -->
 * 
 * <!-- options-start --> Valid options are:
 * <p/>
 * 
 * <pre>
 * -R &lt;num&gt;
 *  Repulsion
 *  (default 2.6)
 * </pre>
 * 
 * <!-- options-end -->
 * 
 * @author Alexander Smirnov (austellus@gmail.com)
 * @version $Revision: 5538 $
 */
public class CLOPE extends AbstractClusterer implements OptionHandler,
		TechnicalInformationHandler {

	/** for serialization */
	static final long serialVersionUID = -567567567567588L;

	/**
	 * Inner class for cluster of CLOPE.
	 * 
	 * @see Serializable
	 */
	private class CLOPECluster implements Serializable {

		/**
		 * Number of transactions
		 */
		public int N = 0; // number of transactions

		/**
		 * Number of distinct items (or width)
		 */
		public int W = 0;

		/**
		 * Size of cluster
		 */
		public int S = 0;

		/**
		 * Hash of <item, occurrence> pairs
		 */
		public HashMap occ = new HashMap();

		/**
		 * Add item to cluster
		 */
		public void AddItem(String Item) {
			int count;
			if (!this.occ.containsKey(Item)) {
				this.occ.put(Item, 1);
			} else {
				count = (Integer) this.occ.get(Item);
				count++;
				this.occ.remove(Item);
				this.occ.put(Item, count);
			}
			this.S++;
		}

		public void AddItem(Integer Item) {
			int count;
			if (!this.occ.containsKey(Item)) {
				this.occ.put(Item, 1);
			} else {
				count = (Integer) this.occ.get(Item);
				count++;
				this.occ.remove(Item);
				this.occ.put(Item, count);
			}
			this.S++;
		}

		/**
		 * Delete item from cluster
		 */
		public void DeleteItem(String Item) {
			int count;

			count = (Integer) this.occ.get(Item);

			if (count == 1) {
				this.occ.remove(Item);

			} else {
				count--;
				this.occ.remove(Item);
				this.occ.put(Item, count);
			}
			this.S--;
		}

		public void DeleteItem(Integer Item) {
			int count;

			count = (Integer) this.occ.get(Item);

			if (count == 1) {
				this.occ.remove(Item);

			} else {
				count--;
				this.occ.remove(Item);
				this.occ.put(Item, count);
			}
			this.S--;
		}

		/**
		 * Calculate Delta
		 */
		public double DeltaAdd(Instance inst, double r) {
			// System.out.println("DeltaAdd");
			int S_new;
			int W_new;
			double profit;
			double profit_new;
			double deltaprofit;
			S_new = 0;
			W_new = occ.size();

			if (inst instanceof SparseInstance) {
				// System.out.println("DeltaAddSparceInstance");
				for (int i = 0; i < inst.numValues(); i++) {
					S_new++;

					if ((Integer) this.occ.get(inst.index(i)) == null) {
						W_new++;
					}
				}
			} else {
				for (int i = 0; i < inst.numAttributes(); i++) {
					if (!inst.isMissing(i)) {
						S_new++;
						if ((Integer) this.occ.get(i + inst.toString(i)) == null) {
							W_new++;
						}
					}
				}
			}
			S_new += S;

			if (N == 0) {
				deltaprofit = S_new / Math.pow(W_new, r);
			} else {
				profit = S * N / Math.pow(W, r);
				profit_new = S_new * (N + 1) / Math.pow(W_new, r);
				deltaprofit = profit_new - profit;
			}
			return deltaprofit;
		}

		/**
		 * Add instance to cluster
		 */
		public void AddInstance(Instance inst) {
			if (inst instanceof SparseInstance) {
				// System.out.println("AddSparceInstance");
				for (int i = 0; i < inst.numValues(); i++) {
					AddItem(inst.index(i));
					// for(int i=0;i<inst.numAttributes();int++){
					// AddItem(inst.index(i)+inst.value(i));
				}
			} else {
				for (int i = 0; i < inst.numAttributes(); i++) {

					if (!inst.isMissing(i)) {

						AddItem(i + inst.toString(i));
					}
				}
			}
			this.W = this.occ.size();
			this.N++;
		}

		/**
		 * Delete instance from cluster
		 */
		public void DeleteInstance(Instance inst) {
			if (inst instanceof SparseInstance) {
				// System.out.println("DeleteSparceInstance");
				for (int i = 0; i < inst.numValues(); i++) {
					DeleteItem(inst.index(i));
				}
			} else {
				for (int i = 0; i <= inst.numAttributes() - 1; i++) {

					if (!inst.isMissing(i)) {
						DeleteItem(i + inst.toString(i));
					}
				}
			}
			this.W = this.occ.size();
			this.N--;
		}
	}

	/**
	 * Array of clusters
	 */
	public ArrayList<CLOPECluster> clusters = new ArrayList<CLOPECluster>();

	/**
	 * Specifies the repulsion default
	 */
	protected double m_RepulsionDefault = 2.6;

	/**
	 * Specifies the repulsion
	 */
	protected double m_Repulsion = m_RepulsionDefault;

	/**
	 * Number of clusters
	 */
	protected int m_numberOfClusters = -1;

	/**
	 * Counter for the processed instances
	 */
	protected int m_processed_InstanceID;

	/**
	 * Number of instances
	 */
	protected int m_numberOfInstances;

	/**
   * 
   */
	protected ArrayList<Integer> m_clusterAssignments = new ArrayList();

	/**
	 * whether the number of clusters was already determined
	 */
	protected boolean m_numberOfClustersDetermined = false;

	public int numberOfClusters() {
		determineNumberOfClusters();
		return m_numberOfClusters;
	}

	protected void determineNumberOfClusters() {

		m_numberOfClusters = clusters.size();

		m_numberOfClustersDetermined = true;
	}

	public Enumeration listOptions() {
		Vector result = new Vector();
		result.addElement(new Option("\tRepulsion\n" + "\t(default "
				+ m_RepulsionDefault + ")", "R", 1, "-R <num>"));
		return result.elements();
	}

	/**
	 * Parses a given list of options.
	 * <p/>
	 * 
	 * <!-- options-start --> Valid options are:
	 * <p/>
	 * 
	 * <pre>
	 * -R &lt;num&gt;
	 *  Repulsion
	 *  (default 2.6)
	 * </pre>
	 * 
	 * <!-- options-end -->
	 * 
	 * @param options
	 *            the list of options as an array of strings
	 * @throws Exception
	 *             if an option is not supported
	 */
	public void setOptions(String[] options) throws Exception {
		String tmpStr;

		tmpStr = Utils.getOption('R', options);
		if (tmpStr.length() != 0) {
			setRepulsion(Double.parseDouble(tmpStr));
		} else {
			setRepulsion(m_RepulsionDefault);
		}
	}

	/**
	 * Gets the current settings of CLOPE
	 * 
	 * @return an array of strings suitable for passing to setOptions()
	 */
	public String[] getOptions() {
		Vector result;

		result = new Vector();

		result.add("-R");
		result.add("" + getRepulsion());

		return (String[]) result.toArray(new String[result.size()]);
	}

	/**
	 * Returns the tip text for this property
	 * 
	 * @return tip text for this property suitable for displaying in the
	 *         explorer/experimenter gui
	 */
	public String repulsionTipText() {
		return "Repulsion to be used.";
	}

	/**
	 * set the repulsion
	 * 
	 * @param value
	 *            the repulsion
	 * @throws Exception
	 *             if number of clusters is negative
	 */
	public void setRepulsion(double value) {
		m_Repulsion = value;
	}

	/**
	 * gets the repulsion
	 * 
	 * @return the repulsion
	 */
	public double getRepulsion() {
		return m_Repulsion;
	}

	/**
	 * Returns default capabilities of the clusterer.
	 * 
	 * @return the capabilities of this clusterer
	 */
	public Capabilities getCapabilities() {
		Capabilities result = super.getCapabilities();
		result.disableAll();
		result.enable(Capability.NO_CLASS);

		// attributes
		result.enable(Capability.NOMINAL_ATTRIBUTES);
		// result.enable(Capability.NUMERIC_ATTRIBUTES);
		result.enable(Capability.MISSING_VALUES);

		return result;
	}

	/**
	 * Generate Clustering via CLOPE
	 * 
	 * @param instances
	 *            The instances that need to be clustered
	 * @throws java.lang.Exception
	 *             If clustering was not successful
	 */
	public void buildClusterer(Instances data) throws Exception {
		clusters.clear();
		m_processed_InstanceID = 0;
		m_clusterAssignments.clear();
		m_numberOfInstances = data.numInstances();
		boolean moved;
		// Phase 1
		for (int i = 0; i < data.numInstances(); i++) {
			int clusterid = AddInstanceToBestCluster(data.instance(i));
			m_clusterAssignments.add(clusterid);

		}
		// Phase 2
		do {
			moved = false;
			for (int i = 0; i < data.numInstances(); i++) {
				m_processed_InstanceID = i;
				int clusterid = MoveInstanceToBestCluster(data.instance(i));
				if (clusterid != m_clusterAssignments.get(i)) {
					moved = true;
					m_clusterAssignments.set(i, clusterid);
				}
			}
		} while (!moved);
		m_processed_InstanceID = 0;
	}

	/**
	 * the default constructor
	 */
	public CLOPE() {
		super();
	}

	/**
	 * Add instance to best cluster
	 */
	public int AddInstanceToBestCluster(Instance inst) {

		double delta;
		double deltamax;
		int clustermax = -1;
		if (clusters.size() > 0) {
			int tempS = 0;
			int tempW = 0;
			if (inst instanceof SparseInstance) {
				for (int i = 0; i < inst.numValues(); i++) {
					tempS++;
					tempW++;
				}
			} else {
				for (int i = 0; i < inst.numAttributes(); i++) {
					if (!inst.isMissing(i)) {
						tempS++;
						tempW++;
					}
				}
			}

			deltamax = tempS / Math.pow(tempW, m_Repulsion);

			for (int i = 0; i < clusters.size(); i++) {
				CLOPECluster tempcluster = clusters.get(i);
				delta = tempcluster.DeltaAdd(inst, m_Repulsion);
				// System.out.println("delta " + delta);
				if (delta > deltamax) {
					deltamax = delta;
					clustermax = i;
				}
			}
		} else {
			CLOPECluster newcluster = new CLOPECluster();
			clusters.add(newcluster);
			newcluster.AddInstance(inst);
			return clusters.size() - 1;
		}

		if (clustermax == -1) {
			CLOPECluster newcluster = new CLOPECluster();
			clusters.add(newcluster);
			newcluster.AddInstance(inst);
			return clusters.size() - 1;
		}
		clusters.get(clustermax).AddInstance(inst);
		return clustermax;
	}

	/**
	 * Move instance to best cluster
	 */
	public int MoveInstanceToBestCluster(Instance inst) {

		clusters.get(m_clusterAssignments.get(m_processed_InstanceID))
				.DeleteInstance(inst);
		m_clusterAssignments.set(m_processed_InstanceID, -1);
		double delta;
		double deltamax;
		int clustermax = -1;
		int tempS = 0;
		int tempW = 0;

		if (inst instanceof SparseInstance) {
			for (int i = 0; i < inst.numValues(); i++) {
				tempS++;
				tempW++;
			}
		} else {
			for (int i = 0; i < inst.numAttributes(); i++) {
				if (!inst.isMissing(i)) {
					tempS++;
					tempW++;
				}
			}
		}

		deltamax = tempS / Math.pow(tempW, m_Repulsion);
		for (int i = 0; i < clusters.size(); i++) {
			CLOPECluster tempcluster = clusters.get(i);
			delta = tempcluster.DeltaAdd(inst, m_Repulsion);
			// System.out.println("delta " + delta);
			if (delta > deltamax) {
				deltamax = delta;
				clustermax = i;
			}
		}
		if (clustermax == -1) {
			CLOPECluster newcluster = new CLOPECluster();
			clusters.add(newcluster);
			newcluster.AddInstance(inst);
			return clusters.size() - 1;
		}
		clusters.get(clustermax).AddInstance(inst);
		return clustermax;
	}

	/**
	 * Classifies a given instance.
	 * 
	 * @param instance
	 *            The instance to be assigned to a cluster
	 * @return int The number of the assigned cluster as an integer
	 * @throws java.lang.Exception
	 *             If instance could not be clustered successfully
	 */
	public int clusterInstance(Instance instance) throws Exception {
		if (m_processed_InstanceID >= m_numberOfInstances) {
			m_processed_InstanceID = 0;
		}
		int i = m_clusterAssignments.get(m_processed_InstanceID);
		m_processed_InstanceID++;
		return i;
	}

	/**
	 * return a string describing this clusterer
	 * 
	 * @return a description of the clusterer as a string
	 */
	public String toString() {
		StringBuffer stringBuffer = new StringBuffer();
		stringBuffer
				.append("CLOPE clustering results\n"
						+ "========================================================================================\n\n");
		stringBuffer.append("Clustered instances: "
				+ m_clusterAssignments.size() + "\n");
		return stringBuffer.toString() + "\n";
	}

	/**
	 * Returns a string describing this DataMining-Algorithm
	 * 
	 * @return String Information for the gui-explorer
	 */
	public String globalInfo() {
		return getTechnicalInformation().toString();
	}

	/**
	 * Returns an instance of a TechnicalInformation object, containing detailed
	 * information about the technical background of this class, e.g., paper
	 * reference or book this class is based on.
	 * 
	 * @return the technical information about this class
	 */
	public TechnicalInformation getTechnicalInformation() {
		TechnicalInformation result;

		result = new TechnicalInformation(Type.INPROCEEDINGS);
		result.setValue(Field.AUTHOR,
				"Yiling Yang and Xudong Guan and Jinyuan You");
		result.setValue(Field.TITLE,
				"CLOPE: a fast and effective clustering algorithm for transactional data");
		result.setValue(
				Field.BOOKTITLE,
				"Proceedings of the eighth ACM SIGKDD international conference on Knowledge discovery and data mining");
		result.setValue(Field.YEAR, "2002");
		result.setValue(Field.PAGES, "682-687");
		result.setValue(Field.PUBLISHER, "ACM  New York, NY, USA");

		return result;
	}

	/**
	 * Returns the revision string.
	 * 
	 * @return the revision
	 */
	public String getRevision() {
		return RevisionUtils.extract("$Revision: 5538 $");
	}

	/**
	 * Main method for testing this class.
	 * 
	 * @param argv
	 *            should contain the following arguments:
	 *            <p>
	 *            -t training file [-R repulsion]
	 */
	public static void main(String[] argv) {
		runClusterer(new CLOPE(), argv);
	}
}
